Share Local Media - Direct Mail Analytics¶

Phase 2: Analysis¶

In [1]:
import pandas as pd
import plotly.express as px

# Load enriched data
df = pd.read_csv('Dataset/direct_mail_campaigns_enriched.csv')
print(df.head())
   Campaign_ID                brand Campaign_Type Target_Audience Duration  \
0            1  Innovate Industries         Email       Men 18-24  30 days   
1            2       NexGen Systems         Email     Women 35-44  60 days   
2            3    Alpha Innovations    Influencer       Men 25-34  30 days   
3            4   DataTech Solutions       Display        All Ages  60 days   
4            5       NexGen Systems         Email       Men 25-34  15 days   

  Channel_Used     Location  Conversion_Rate Acquisition_Cost   ROI  \
0   Google Ads      Chicago             0.04       $16,174.00  6.29   
1   Google Ads     New York             0.12       $11,566.00  5.61   
2      YouTube  Los Angeles             0.07       $10,200.00  7.18   
3      YouTube        Miami             0.11       $12,724.00  5.55   
4      YouTube  Los Angeles             0.05       $16,452.00  6.50   

   audience_size_mailed  total_cost  responses      revenue        cpa  roas  \
0                329582   214228.30  10546.624  1561724.307  20.312500  7.29   
1                481416   312920.40  46215.936  2068403.844   6.770833  6.61   
2                431181   280267.65  24146.136  2292589.377  11.607143  8.18   
3                111938    72759.70   9850.544   476576.035   7.386364  6.55   
4                262805   170823.25  10512.200  1281174.375  16.250000  7.50   

   response_rate  
0          0.032  
1          0.096  
2          0.056  
3          0.088  
4          0.040  

Summary Tables: CPA/ROAS by Channel and Location¶

In [2]:
# CPA/ROAS by Channel
channel_summary = df.groupby('Channel_Used')[['cpa', 'roas']].mean().reset_index()
print("Channel Summary:")
print(channel_summary)

# CPA/ROAS by Location
location_summary = df.groupby('Location')[['cpa', 'roas']].mean().reset_index()
print("\nLocation Summary:")
print(location_summary.head())
Channel Summary:
  Channel_Used        cpa      roas
0        Email  16.029992  5.996487
1     Facebook  16.021051  6.018699
2   Google Ads  16.211453  6.003141
3    Instagram  16.175276  5.988706
4      Website  16.079477  6.014167
5      YouTube  16.234669  5.993754

Location Summary:
      Location        cpa      roas
0      Chicago  16.161203  6.001590
1      Houston  16.094832  6.007203
2  Los Angeles  16.159589  6.010876
3        Miami  16.128510  6.012282
4     New York  16.083450  5.980228

Market Tiers¶

In [3]:
# Bucket Locations by CPA percentile
# We calculate average CPA per location to classify the location itself
location_perf = df.groupby('Location')['cpa'].mean().reset_index()
cpa_33 = location_perf['cpa'].quantile(0.33)
cpa_66 = location_perf['cpa'].quantile(0.66)

def get_tier(cpa):
    if cpa <= cpa_33: return 'Top' # Lower CPA is better
    elif cpa <= cpa_66: return 'Middle'
    else: return 'Lower'

location_perf['Market_Tier'] = location_perf['cpa'].apply(get_tier)

# Merge back to main df so each campaign has its location's tier
df = df.merge(location_perf[['Location', 'Market_Tier']], on='Location')
print(df[['Location', 'Market_Tier']].drop_duplicates().head())
           Location Market_Tier
0           Chicago       Lower
40010      New York         Top
80034   Los Angeles       Lower
119981        Miami      Middle
160250      Houston         Top

Visualizations¶

In [4]:
# 1. Bar: Avg CPA by Channel_Used
fig1 = px.bar(channel_summary, x='Channel_Used', y='cpa', title='Average CPA by Channel')
fig1.show()

# 2. Bar: Avg ROAS by Market Tier
tier_summary = df.groupby('Market_Tier')['roas'].mean().reset_index()
# Order tiers logically
tier_order = {'Top': 1, 'Middle': 2, 'Lower': 3}
tier_summary['order'] = tier_summary['Market_Tier'].map(tier_order)
tier_summary = tier_summary.sort_values('order')

fig2 = px.bar(tier_summary, x='Market_Tier', y='roas', title='Average ROAS by Market Tier')
fig2.show()

# 3. Scatter: Audience Size vs CPA
fig3 = px.scatter(df, x='audience_size_mailed', y='cpa', size='revenue', color='Channel_Used', 
                  title='Audience Size vs CPA', hover_data=['Location', 'Market_Tier'])
fig3.show()